import requests
from lxml import etree
from bs4 import BeautifulSoup

html = '''
    <html>
        <head>
            <title>网页标题</title>
        </head>
        <body>
            <h1 class="h1_row" id="h1">网页内容标题</h1>
            <div id="b1" class="div_text">div标签内容1</div>
            <div id="b2" class="div_text"><span>span标签内容</span>div标签内容2</div>
            <div id="b3" class="div_text"><i>i标签内容</i>div标签内容3</div>
            <div id="b4" class="div_text">div标签内容4</div>
        </body>
    </html>
'''

bs = BeautifulSoup(html, 'lxml')
print(bs.title)
# 获取标签所有属性
print(bs.h1.attrs)
# 获取标签属性值
print(bs.h1.get('class'))
print(bs.h1['class'])
# 获取标签文本内容
print(bs.h1.string)
print(bs.h1.text)
# 查找
print(bs.find('div', class_='div_text'))  # 获取满足条件的首个值
print(bs.find_all('div', class_='div_text'))  # 获取满足条件的所有值
for item in bs.find_all('div', class_='div_text'):
    print(type(item))
# 选择器
print(bs.select('#b1'))
print(bs.select('.div_text'))
print(bs.select('div>i'))
print(bs.select('div>span'))
